Preparations

Load the necessary libraries

library(rstanarm)   #for fitting models in STAN
library(brms)       #for fitting models in STAN
library(coda)       #for diagnostics
library(ggmcmc)     #for MCMC diagnostics
library(bayesplot)  #for diagnostics
library(rstan)      #for interfacing with STAN
library(DHARMa)     #for residual diagnostics
library(emmeans)    #for marginal means etc
library(broom)      #for tidying outputs
library(broom.mixed)
library(tidybayes)  #for more tidying outputs
library(ggeffects)  #for partial plots
library(tidyverse)  #for data wrangling etc
library(patchwork)
theme_set(theme_classic())

Scenario

Here is a modified example from Quinn and Keough (2002). Day and Quinn (1989) described an experiment that examined how rock surface type affected the recruitment of barnacles to a rocky shore. The experiment had a single factor, surface type, with 4 treatments or levels: algal species 1 (ALG1), algal species 2 (ALG2), naturally bare surfaces (NB) and artificially scraped bare surfaces (S). There were 5 replicate plots for each surface type and the response (dependent) variable was the number of newly recruited barnacles on each plot after 4 weeks.

Six-plated barnacle

Format of day.csv data files

treat barnacle
ALG1 27
.. ..
ALG2 24
.. ..
NB 9
.. ..
S 12
.. ..
treat Categorical listing of surface types. ALG1 = algal species 1, ALG2 = algal species 2, NB = naturally bare surface, S = scraped bare surface.
barnacle The number of newly recruited barnacles on each plot after 4 weeks.

Read in the data

day <- read_csv('../data/day.csv', trim_ws=TRUE) %>%
  janitor::clean_names() %>%
  mutate(treat = fct_relevel(treat, c("NB", "S", "ALG1", "ALG2")))
glimpse(day)
## Rows: 20
## Columns: 2
## $ treat    <fct> ALG1, ALG1, ALG1, ALG1, ALG1, ALG2, ALG2, ALG2, ALG2, ALG2, …
## $ barnacle <dbl> 27, 19, 18, 23, 25, 24, 33, 27, 26, 32, 9, 13, 17, 14, 22, 1…

Model formula: \[ \begin{align} y_i &\sim{} \mathcal{Pois}(\lambda_i)\\ ln(\lambda_i) &= \boldsymbol{\beta_k} \bf{X_i}\\ \beta_0 &\sim{} \mathcal{N}(0,10)\\ \boldsymbol{\beta_{k}} &\sim{} \mathcal{N}(0,1)\\ \end{align} \]

where \(\boldsymbol{\beta}\) is a vector of effects parameters and \(\bf{X}\) is a model matrix representing the intercept and treatment contrasts for the effects of Treatment on barnacle recruitment.

Exploratory data analysis

ggplot(day, aes(y=barnacle, x=treat)) +
    geom_boxplot()+
    geom_point(color='red')

ggplot(day, aes(y=barnacle, x=treat)) +
    geom_violin()+
    geom_point(color='red')

Fit the model

Priors for a poisson:

Find the mean of the first group:

day %>% filter(treat == "NB") %>% summarise(mean(barnacle), sd(barnacle))

But on the log scale:

log(15)
## [1] 2.70805

Close to 3, so can use that!

Next, set variance of normal to something similar for the first group. The effects will have a similar variance.

Finally, set

priors <- 
  prior(normal(3,5), class = "Intercept") +
  prior(normal(0,5), class = "b")
day_brm1 <- brm(bf(barnacle ~ treat,
                family = poisson(link = "log")),
                data = day, 
                prior = priors,
                sample_prior = "only", # predictive prior distribution
                save_all_pars = TRUE,
                iter = 5000, warmup = 5000/2, chains = 3, thin = 5)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk   -fPIC  -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## 
## SAMPLING FOR MODEL '2d62460de406c7c10ee7029240797c87' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 1.4e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.14 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 5000 [  0%]  (Warmup)
## Chain 1: Iteration:  500 / 5000 [ 10%]  (Warmup)
## Chain 1: Iteration: 1000 / 5000 [ 20%]  (Warmup)
## Chain 1: Iteration: 1500 / 5000 [ 30%]  (Warmup)
## Chain 1: Iteration: 2000 / 5000 [ 40%]  (Warmup)
## Chain 1: Iteration: 2500 / 5000 [ 50%]  (Warmup)
## Chain 1: Iteration: 2501 / 5000 [ 50%]  (Sampling)
## Chain 1: Iteration: 3000 / 5000 [ 60%]  (Sampling)
## Chain 1: Iteration: 3500 / 5000 [ 70%]  (Sampling)
## Chain 1: Iteration: 4000 / 5000 [ 80%]  (Sampling)
## Chain 1: Iteration: 4500 / 5000 [ 90%]  (Sampling)
## Chain 1: Iteration: 5000 / 5000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.042966 seconds (Warm-up)
## Chain 1:                0.044845 seconds (Sampling)
## Chain 1:                0.087811 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '2d62460de406c7c10ee7029240797c87' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 2.5e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.25 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 5000 [  0%]  (Warmup)
## Chain 2: Iteration:  500 / 5000 [ 10%]  (Warmup)
## Chain 2: Iteration: 1000 / 5000 [ 20%]  (Warmup)
## Chain 2: Iteration: 1500 / 5000 [ 30%]  (Warmup)
## Chain 2: Iteration: 2000 / 5000 [ 40%]  (Warmup)
## Chain 2: Iteration: 2500 / 5000 [ 50%]  (Warmup)
## Chain 2: Iteration: 2501 / 5000 [ 50%]  (Sampling)
## Chain 2: Iteration: 3000 / 5000 [ 60%]  (Sampling)
## Chain 2: Iteration: 3500 / 5000 [ 70%]  (Sampling)
## Chain 2: Iteration: 4000 / 5000 [ 80%]  (Sampling)
## Chain 2: Iteration: 4500 / 5000 [ 90%]  (Sampling)
## Chain 2: Iteration: 5000 / 5000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.040179 seconds (Warm-up)
## Chain 2:                0.047821 seconds (Sampling)
## Chain 2:                0.088 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL '2d62460de406c7c10ee7029240797c87' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1.7e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 5000 [  0%]  (Warmup)
## Chain 3: Iteration:  500 / 5000 [ 10%]  (Warmup)
## Chain 3: Iteration: 1000 / 5000 [ 20%]  (Warmup)
## Chain 3: Iteration: 1500 / 5000 [ 30%]  (Warmup)
## Chain 3: Iteration: 2000 / 5000 [ 40%]  (Warmup)
## Chain 3: Iteration: 2500 / 5000 [ 50%]  (Warmup)
## Chain 3: Iteration: 2501 / 5000 [ 50%]  (Sampling)
## Chain 3: Iteration: 3000 / 5000 [ 60%]  (Sampling)
## Chain 3: Iteration: 3500 / 5000 [ 70%]  (Sampling)
## Chain 3: Iteration: 4000 / 5000 [ 80%]  (Sampling)
## Chain 3: Iteration: 4500 / 5000 [ 90%]  (Sampling)
## Chain 3: Iteration: 5000 / 5000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.039056 seconds (Warm-up)
## Chain 3:                0.04679 seconds (Sampling)
## Chain 3:                0.085846 seconds (Total)
## Chain 3:
prior_summary(day_brm1)
g <- day_brm1 %>% conditional_effects() %>% plot(points = T)

g[[1]] + scale_y_log10("Barnacle") + labs(x = "Treatment")

Conclusion: this is a very vague prior.

day_brm2 <- update(day_brm1, sample_prior = "yes")
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk   -fPIC  -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## 
## SAMPLING FOR MODEL '2a5a80bb5842f3d7599632b8fffe60fe' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 2.8e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 5000 [  0%]  (Warmup)
## Chain 1: Iteration:  500 / 5000 [ 10%]  (Warmup)
## Chain 1: Iteration: 1000 / 5000 [ 20%]  (Warmup)
## Chain 1: Iteration: 1500 / 5000 [ 30%]  (Warmup)
## Chain 1: Iteration: 2000 / 5000 [ 40%]  (Warmup)
## Chain 1: Iteration: 2500 / 5000 [ 50%]  (Warmup)
## Chain 1: Iteration: 2501 / 5000 [ 50%]  (Sampling)
## Chain 1: Iteration: 3000 / 5000 [ 60%]  (Sampling)
## Chain 1: Iteration: 3500 / 5000 [ 70%]  (Sampling)
## Chain 1: Iteration: 4000 / 5000 [ 80%]  (Sampling)
## Chain 1: Iteration: 4500 / 5000 [ 90%]  (Sampling)
## Chain 1: Iteration: 5000 / 5000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.08992 seconds (Warm-up)
## Chain 1:                0.097613 seconds (Sampling)
## Chain 1:                0.187533 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '2a5a80bb5842f3d7599632b8fffe60fe' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 1.7e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 5000 [  0%]  (Warmup)
## Chain 2: Iteration:  500 / 5000 [ 10%]  (Warmup)
## Chain 2: Iteration: 1000 / 5000 [ 20%]  (Warmup)
## Chain 2: Iteration: 1500 / 5000 [ 30%]  (Warmup)
## Chain 2: Iteration: 2000 / 5000 [ 40%]  (Warmup)
## Chain 2: Iteration: 2500 / 5000 [ 50%]  (Warmup)
## Chain 2: Iteration: 2501 / 5000 [ 50%]  (Sampling)
## Chain 2: Iteration: 3000 / 5000 [ 60%]  (Sampling)
## Chain 2: Iteration: 3500 / 5000 [ 70%]  (Sampling)
## Chain 2: Iteration: 4000 / 5000 [ 80%]  (Sampling)
## Chain 2: Iteration: 4500 / 5000 [ 90%]  (Sampling)
## Chain 2: Iteration: 5000 / 5000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.092257 seconds (Warm-up)
## Chain 2:                0.090127 seconds (Sampling)
## Chain 2:                0.182384 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL '2a5a80bb5842f3d7599632b8fffe60fe' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1.3e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 5000 [  0%]  (Warmup)
## Chain 3: Iteration:  500 / 5000 [ 10%]  (Warmup)
## Chain 3: Iteration: 1000 / 5000 [ 20%]  (Warmup)
## Chain 3: Iteration: 1500 / 5000 [ 30%]  (Warmup)
## Chain 3: Iteration: 2000 / 5000 [ 40%]  (Warmup)
## Chain 3: Iteration: 2500 / 5000 [ 50%]  (Warmup)
## Chain 3: Iteration: 2501 / 5000 [ 50%]  (Sampling)
## Chain 3: Iteration: 3000 / 5000 [ 60%]  (Sampling)
## Chain 3: Iteration: 3500 / 5000 [ 70%]  (Sampling)
## Chain 3: Iteration: 4000 / 5000 [ 80%]  (Sampling)
## Chain 3: Iteration: 4500 / 5000 [ 90%]  (Sampling)
## Chain 3: Iteration: 5000 / 5000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.091847 seconds (Warm-up)
## Chain 3:                0.095377 seconds (Sampling)
## Chain 3:                0.187224 seconds (Total)
## Chain 3:

Next, compare posterior to prior

day_brm2 %>% get_variables()
##  [1] "b_Intercept"     "b_treatS"        "b_treatALG1"     "b_treatALG2"    
##  [5] "Intercept"       "prior_Intercept" "prior_b"         "lp__"           
##  [9] "accept_stat__"   "stepsize__"      "treedepth__"     "n_leapfrog__"   
## [13] "divergent__"     "energy__"
hypothesis(day_brm2, "treatALG1 = 0") %>% plot()

Prior is not affecting or restricting the posterior’s shape!

MCMC sampling diagnostics

mcmc_plot(day_brm2, type='combo') # good mixing of chains

mcmc_plot(day_brm2, type='acf_bar') # No autocorrelation

mcmc_plot(day_brm2, type='rhat_hist') # Rhat less than 1.05

mcmc_plot(day_brm2, type='neff_hist') # Neff greater than 0.5 or 50%

ggs_crosscorrelation(ggs(day_brm2$fit)) # some cross-correlation

ggs_grb(ggs(day_brm2$fit)) # scale reduction

Converged on a stable posterior distribution.

Model validation

pp_check(day_brm2, type = "dens_overlay", nsamples = 100)
pp_check(day_brm2, x = "barnacle", type = "intervals")
# not working for some reason!

DHARMa residuals:

preds <- posterior_predict(day_brm2, nsamples = 250, 
                           summary = FALSE)
day_resids <- createDHARMa(
  simulatedResponse = t(preds),  # simulated predictions/expected values for each observation
  observedResponse = day$barnacle, # true values
  fittedPredictedResponse = apply(preds, 2, median), # get median expected value for all data points
  integerResponse = TRUE) # type of distribution

plot(day_resids)

Looks good!

Partial effects plots

day_brm2 %>%
  conditional_effects() %>%
  plot(points = TRUE)

Model investigation

(x<-tidyMCMC(day_brm2, conf.int = T, conf.method = "HPDinterval",
         drop.pars = c("lp__", "deviance", "prior_Intercept", "prior_b")))
  • is the estimate for the intercept, which on the normal scale is
    barnacles in the treatment NB.
  • The second group S is
    times the barnacles of the first treatment, at
  • etc…

R^2

bayes_R2(day_brm2, summary = FALSE) %>% 
  median_hdci()

~70% explained

Further investigations

How different are the groups?

Pair-wise contrasts

day_brm2 %>%
  emmeans(~treat, at = list(levels(day$treat)), type = "response") %>%
  # note that we don't need the at part, as it will automatically figure out the factors. Just need it for the covariates
  pairs()
##  contrast    ratio lower.HPD upper.HPD
##  NB / S      1.137     0.808     1.582
##  NB / ALG1   0.673     0.485     0.872
##  NB / ALG2   0.528     0.383     0.676
##  S / ALG1    0.593     0.430     0.783
##  S / ALG2    0.462     0.330     0.603
##  ALG1 / ALG2 0.786     0.598     0.989
## 
## Point estimate displayed: median 
## Results are back-transformed from the log scale 
## HPD interval probability: 0.95
day_brm2 %>%
  emmeans(~treat, at = list(levels(day$treat))) %>%
  regrid() %>%
  pairs() # HPD intervals may be wrong...
##  contrast    estimate lower.HPD upper.HPD
##  NB - S          1.81     -2.79     6.682
##  NB - ALG1      -7.25    -12.88    -2.283
##  NB - ALG2     -13.31    -19.63    -8.073
##  S - ALG1       -9.02    -14.22    -4.187
##  S - ALG2      -15.15    -20.84    -9.316
##  ALG1 - ALG2    -5.96    -12.90    -0.234
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95
(newdata <- day_brm2 %>%
  emmeans(~treat, type = "link") %>% # didn't back-transform properly...
  pairs() %>%
  gather_emmeans_draws() %>%
  mutate(fit = exp(.value)))
newdata %>% median_hdci(fit)
newdata_p <- newdata %>% summarise(P = sum(fit > 1) / n())

Slab plot

newdata %>% 
  ggplot() +
  geom_vline(xintercept = 1, linetype = "dashed") +
  stat_slab(aes(x = fit, y = contrast,
                fill = stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95),
                                      labels = scales::percent_format()))), color = "black") +
  scale_fill_brewer("Interval", direction = -1, na.translate = FALSE, palette = "YlOrRd") +
  geom_text(data = newdata_p, aes(y = contrast, x = 1, label = paste("P =",round(P,3))), hjust = 0, position = position_nudge(y=0.5))

Planned comparisons

We won’t need to worry about the number of planned comparisons with bayesian!

Compare 4+ different things using a contrast matrix:

levels(day$treat)
## [1] "NB"   "S"    "ALG1" "ALG2"
cmat <- cbind("Alg2_Alg1" = c(0, 0, -1,1), # do first one as positive
              "NB_S"      = c(1,-1, 0, 0),
              "Alg_Bare"  = c(-0.5,-0.5,0.5,0.5),
              "Alg_NB"    = c(-1, 0, 0.5, 0.5))
(day_em <- day_brm2 %>%
  emmeans(~treat, type = "link") %>% # didn't back-transform properly...
  contrast(method = list(treat= cmat)) %>%
  gather_emmeans_draws() %>%
  mutate(fit = exp(.value))) %>%
  summarise(P = sum(fit > 1)/n()) -> day_em_p# probability of an effect
  # median_hdci(fit)
day_em_p

What is the probability that the value is 50% higher?

day_em %>% summarise(P = sum(fit > 1.5)/n())

High likelihood of alg being > bare, same for alg > nb, but not a lot of evidence for differences between algaes or scraped vs. naturally bare.

Slab plot

day_em %>% 
  ggplot() +
  geom_vline(xintercept = c(1,1.5), linetype = "dashed") +
  stat_slab(aes(x = fit, y = contrast,
                fill = stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95),
                                      labels = scales::percent_format()))), color = "black") +
  scale_fill_brewer("Interval", direction = -1, na.translate = FALSE, palette = "YlOrRd") +
  geom_text(data = day_em_p, aes(y = contrast, x = 1, label = paste("P =", round(P,3))), hjust = 1, position = position_nudge(y=0.8))

Summary Figure

day_form <- bf(barnacle ~ treat,  family=poisson(link='log'))
get_prior(day_form,  data=day)
day_priors <- c(
  prior(normal(0, 10),  class='Intercept'),
  prior(normal(0, 2.5), class='b')
)
day_brms <- brm(day_form, data=day,
                prior=day_priors, 
                 chains=3,  iter=5000,  warmup=2000, thin=5,
                 refresh=0)

plot(day_brms)
mcmc_plot(day_brms,  type='acf_bar')
mcmc_plot(day_brms,  type='rhat_hist')
mcmc_plot(day_brms,  type='neff_hist')

preds <- posterior_predict(day_brms, nsamples=250, summary=FALSE)
day_resids <- createDHARMa(simulatedResponse = t(preds),
                            observedResponse = day$barnacle,
                           fittedPredictedResponse = apply(preds, 2, median),
                           integerResponse = TRUE)
plot(day_resids)

                                        #pp_check(day_brmsP, x=as.numeric(day$treat),'intervals')


ggpredict(day_brms, term='treat') %>% plot
ggpredict(day_brms, ~treat) %>% plot
ggemmeans(day_brms, ~treat) %>% plot

summary(day_brms)

tidyMCMC(day_brms$fit, conf.int=TRUE,
         conf.method='HPDinterval', rhat=TRUE,ess=TRUE)

# Pairwise comparisons
library(emmeans)
## factor statements
emmeans(day_brms, pairwise~treat, type='response')
## what about probabilities
day_em = emmeans(day_brms, pairwise~treat, type='link')$contrasts %>%
    gather_emmeans_draws() %>%
    mutate(Fit=exp(.value))
day_em %>% head
day_em %>% group_by(contrast) %>%
    ggplot(aes(x=Fit)) +
    geom_histogram() +
    geom_vline(xintercept=1, color='red') + 
    facet_wrap(~contrast, scales='free')
day_em %>% group_by(contrast) %>% median_hdi(.width=c(0.8, 0.95))

day_sum <- day_em %>%
  group_by(contrast) %>%
  median_hdci(.width=c(0.8, 0.95))
day_sum
ggplot(day_sum) +
  geom_hline(yintercept=1, linetype='dashed') +
  geom_pointrange(aes(x=contrast, y=Fit, ymin=Fit.lower, ymax=Fit.upper, size=factor(.width)),
                  show.legend = FALSE) +
  scale_size_manual(values=c(1, 0.5)) +
  coord_flip()

g1 <- ggplot(day_sum) +
  geom_hline(yintercept=1) +
  geom_pointrange(aes(x=contrast, y=Fit, ymin=Fit.lower, ymax=Fit.upper, size=factor(.width)), show.legend = FALSE) +
  scale_size_manual(values=c(1, 0.5)) +
  scale_y_continuous(trans=scales::log2_trans(),  breaks=c(0.5, 1, 2, 4)) +
  coord_flip()
g1
                                        # Probability of effect
day_em %>% group_by(contrast) %>% summarize(P=sum(.value>0)/n())
day_em %>% group_by(contrast) %>% summarize(P=sum(Fit>1)/n())
##Probability of effect greater than 10%
day_em %>% group_by(contrast) %>% summarize(P=sum(Fit>1.1)/n())

##Planned contrasts
cmat<-cbind('Alg2_Alg1'=c(-1,1,0,0),
              'NB_S'=c(0,0,1,-1),
             'Alg_Bare'=c(0.5,0.5,-0.5,-0.5),
             'Alg_NB'=c(0.5,0.5,-1,0))
#crossprod(cmat)
emmeans(day_brms, ~treat, contr=list(treat=cmat), type='link')
emmeans(day_brms, ~treat, contr=list(treat=cmat), type='response')
day_em = emmeans(day_brms, ~treat, contr=list(treat=cmat), type='link')$contrasts %>%
      gather_emmeans_draws() %>%
      mutate(Fit=exp(.value)) 
day_em %>% group_by(contrast) %>% median_hdci()
# Probability of effect
day_em %>% group_by(contrast) %>% summarize(P=sum(Fit>1)/n())
##Probability of effect greater than 10%
day_em %>% group_by(contrast) %>% summarize(P=sum(Fit>1.1)/n())

hist(bayes_R2(day_brms, summary=FALSE))

bayes_R2(day_brms, summary=FALSE) %>% median_hdi


## Summary plot
day_grid = with(day, list(treat=levels(treat)))
newdata = emmeans(day_brms, ~treat, type='response') %>% as.data.frame
head(newdata)
g2 <- ggplot(newdata, aes(y=rate, x=treat)) +
  geom_pointrange(aes(ymin=lower.HPD, ymax=upper.HPD))

library(patchwork)
g1 + g2

References

Quinn, G. P., and K. J. Keough. 2002. Experimental Design and Data Analysis for Biologists. London: Cambridge University Press.